I conduct a STM (Strucutral Topic Molde) estimation on a sample of 11,919 online news articles from seven news provider about domestic politics: Bild.de, DIE WELT, FOCUS ONLINE, SPIEGEL ONLINE, Stern.de, ZEIT ONLINE, Tagesschau.de. The articles are dated from 01.06.2017 to 31.12.2017 (German federal elections took place on 24th of September 2017.). I first extract all online articles using the the Eventregistry API. Then all articles from the section “domestic policy” are filtered by checking the URL structure.

To discover the latent topics in the corpus, the structural topic modeling (STM) developed by Roberts (2016) is applied. The STM is an unsupervised machine learning approach that models topics as multinomial distributions of words and documents as multinomial distributions of topics, allowing to incorporate external variables that effect both, topical content and topical prevalence. I will included the news provider as a control for both the topical content and the topical prevalence. Additional, the month an article was published is included as a control for the topical prevalence. The number of topics is set to 35.

Distribution of articles

The Figures below show the distribution of the number of articles from the respective news sources by date. There is a high peak around the federal elections on September, 24th.

ggsave({
  btw %>%
  ggplot(aes(site)) +
  geom_bar(fill=col[1], alpha= .8) +
  labs(x="", y="Number of articles") +
  theme(
      legend.position   = "none"
    )
  
},
filename = "../figs/bar.png", device = "png", 
width = 6, height = 4,
        dpi = 600)
plot1

plot1

ggsave({
  btw %>%
  group_by(date) %>%
  dplyr::summarise(obs = n()) %>%
  ggplot(aes(date, obs)) +
  geom_line(color=col[3]) +
  geom_vline(aes(xintercept=as.Date("2017-09-24")),
             linetype = 2, color=col[2]) +
  scale_color_manual(values = col) +
  labs(x="", y="number of articles",color="") +
  scale_x_date(breaks = date_breaks("1 month"), labels=date_format("%B", tz="CET")) +
  theme(
      legend.position   = "none",
      axis.title.x      = element_blank(),
      axis.text       = element_text(size = 8)
    )
},
filename = "../figs/timeline.png", device = "png",width = 6, height = 4,
dpi = 600
)
plot1

plot1

2. Model Results

Label topics

In order to improve readability and traceability, I assign a shorter name to the topics based on the most common words.

sagelabs <- sageLabels(stmOut, 20)

sagelabs$cov.betas[[2]]$problabels[,1:4]
##       [,1]            [,2]              [,3]             [,4]           
##  [1,] "schulz"        "spd"             "martin"         "gabriel"      
##  [2,] "merkel"        "schulz"          "kanzlerin"      "duell"        
##  [3,] "fdp"           "jamaika"         "grünen"         "lindner"      
##  [4,] "auto"          "glyphosat"       "diesel"         "schmidt"      
##  [5,] "kohl"          "helmut"          "jahr"           "europa"       
##  [6,] "merkel"        "cdu"             "csu"            "union"        
##  [7,] "stimmen"       "wahlkrei"        "bundestagswahl" "veränderung"  
##  [8,] "seehof"        "csu"             "söder"          "bayern"       
##  [9,] "welt"          "deutschland"     "europa"         "politik"      
## [10,] "bundestag"     "parlament"       "abgeordnet"     "abgeordneten" 
## [11,] "schäubl"       "wolfgang"        "heimat"         "lammert"      
## [12,] "deutschland"   "flüchtling"      "jahr"           "welt"         
## [13,] "twitter"       "facebook"        "schröder"       "whatsapp"     
## [14,] "grünen"        "familiennachzug" "özdemir"        "flüchtling"   
## [15,] "berlin"        "deutschland"     "trump"          "muslim"       
## [16,] "spd"           "koalit"          "schulz"         "union"        
## [17,] "afd"           "weidel"          "welt"           "alic"         
## [18,] "euro"          "cdu"             "milliarden"     "fdp"          
## [19,] "ge"            "ten"             "be"             "le"           
## [20,] "berlin"        "jahr"            "amri"           "welt"         
## [21,] "bundeswehr"    "welt"            "mali"           "hubschraub"   
## [22,] "eu"            "flüchtling"      "italien"        "europäischen" 
## [23,] "niedersachsen" "cdu"             "afd"            "twesten"      
## [24,] "cdu"           "merkel"          "altmaier"       "politik"      
## [25,] "link"          "rot"             "linken"         "wagenknecht"  
## [26,] "steinmeier"    "maa"             "bundestag"      "bundespräsid" 
## [27,] "prozent"       "jahr"            "deutschland"    "menschen"     
## [28,] "prozent"       "spd"             "umfrag"         "cdu"          
## [29,] "afd"           "partei"          "petri"          "gauland"      
## [30,] "frauen"        "richter"         "urteil"         "prozess"      
## [31,] "bundeswehr"    "soldaten"        "leyen"          "franco"       
## [32,] "hamburg"       "polizei"         "gipfel"         "demonstranten"
## [33,] "türkei"        "erdogan"         "gabriel"        "deutschland"  
## [34,] "polizei"       "berlin"          "welt"           "polizisten"   
## [35,] "csu"           "bundestagswahl"  "cdu"            "wahl"
topics <- matrix(c(1, "SPD, M.Schulz", 2, "A.Merkel vs. Schulz", 3, "Jamaica coalition", 4, "Diesel scandal", 
                   5, "H.Kohl", 6, "A.Merkel", 7, "Federal Election results", 8, "CSU, Seehofer, Söder", 
                   9, "Deportation, radicalization, refugees", 10, "Parliament (Bundestag)", 
                   11, "DIE LINKE, W.Schäuble", 12, "Refugees in GER", 13, "G.Schröder, IT Topics", 
                   14, "B90/Die Grüne", 
                   15, "D.Trump, antisemitism", 16, "Great coalition", 17, "AfD, in social media", 
                   18, "Budget statistics, Welfare policy", 
                   19, "Rubbish", 20, "Verfassungsschutz, Terror attacks", 21, "German armed forces, Drones, Mali", 
                   22, "EU policies, Weather", 23, "Elections in Niedersachsen", 24, "CDU, social media", 
                   25, "Political talkshows", 26, "Bundestag, Steinmeier, Maas", 
                   27, "Surveys, public statistics", 28, "Election polls", 
                   29, "AfD", 30, "Court sentences, NSU trial", 31, "German armed forces, v.d.Leyen", 
                   32, "G20 in Hamburg", 33, "Diplomatic relations w Turkey, Russia", 
                   34, "Terror attacks, polics reports", 35, "CDU/CSU H.Seehofer, Obergrenze"), ncol=2, byrow=T)

topics.df <- as.data.frame(topics) %>%
  transmute(topic_name = paste(V1, V2, sep=": "),
         topic = 1:k) 

Next, we can assign a topic to each document (topic with highest postertior gamma)

# Document-topic probabilities
stmOut %>% tidy("theta") -> theta

top_topics <- theta %>% 
  group_by(document) %>%
  mutate(therank = rank(-gamma)) %>%
  filter(therank == 1) %>%
  select(- therank)

btw.2 <- btw %>%
  mutate(document = articleID) %>%
  merge(.,top_topics, by="document") %>%
  ## Combine with Topic label
  merge(., topics.df, by="topic") %>%
  mutate(allocation = 1) 
How is the top-gamma value distributed among the corpus?
btw.2 %>%
  ggplot(aes(gamma)) +
  geom_density(color = col[3],
               fill = col[3], alpha=.7) +
  labs(x="Distribution of gamma value")

Inspect the unclear topics

btw.2 %>% filter(topic==13) %>% select(title, gamma, url) %>%
  arrange(desc(gamma)) %>%
  top_n(10) %>%
  htmlTable::htmlTable(align="l")
## Selecting by url
title gamma url
1 Ex-Kanzler: Gerhard Schröders neue Liebe und seine alten Gewohnheiten 0.976619936560834 https://www.welt.de/politik/deutschland/article168903005/Gerhard-Schroeders-neue-Liebe-und-seine-alten-Gewohnheiten.html
2 Bundestagswahl 2017: Die Homescreens von Christian Lindner, Hubertus Heil, Peter Tauber 0.929666126198794 https://www.welt.de/politik/deutschland/article168238989/Das-verraten-die-Handy-Homescreens-ueber-deutsche-Politiker.html
3 Kristina Schröder für Begrenzung von Kanzler-Amtszeit 0.765020388823587 https://www.welt.de/politik/deutschland/article167407642/Kristina-Schroeder-fuer-Begrenzung-von-Kanzler-Amtszeit.html
4 Angela Merkel lässt sich im Wahlkampf von YouTubern interviewen - WELT 0.745966705094547 https://www.welt.de/politik/deutschland/article167522521/Warum-Angela-Merkel-die-Naehe-zu-den-YouTubern-sucht.html
5 Ex-Familienministerin: Kristina Schröder erwartet drittes Kind - WELT 0.544122276950742 https://www.welt.de/politik/deutschland/article169984391/Kristina-Schroeder-erwartet-ihr-drittes-Kind.html
6 Niedersachsen-Wahl: Doris Schröder-Köpf zittert um SPD-Landtagsmandat - WELT 0.488993875790351 https://www.welt.de/politik/deutschland/article169527258/Doris-Schroeder-Koepf-zittert-um-ihre-politische-Karriere.html
7 Niedersachsen-Wahl: Doris Schröder-Köpf zittert um SPD-Landtagsmandat - WELT 0.486666232015486 https://www.welt.de/politik/deutschland/article169527258/Doris-Schroeder-Koepf-bangt-um-ihre-politische-Karriere.html
8 Arbeitskampf: Jetzt ist die Wut in Görlitz - WELT 0.485700813303678 https://www.welt.de/politik/deutschland/article171197016/Jetzt-ist-die-Wut-in-Goerlitz.html
9 Roman Müller-Böhm (FDP): Das fragwürdige Geschäftsgebaren des jüngsten Abgeordneten - WELT 0.323464158406723 https://www.welt.de/politik/deutschland/article170413550/Das-fragwuerdige-Geschaeftsgebaren-des-juengsten-Abgeordneten.html
10 Wettbewerb: Bei der Kanzlerwahl fühlen sich Bewerber hintergangen - WELT 0.241454821933349 https://www.welt.de/politik/deutschland/article167643203/Bei-der-Kanzlerwahl-fuehlen-sich-Bewerber-hintergangen.html
btw.2 %>% filter(topic==10) %>% select(title, gamma, url) %>%
  arrange(desc(gamma)) %>%
  top_n(10) %>%
  htmlTable::htmlTable(align="l")
## Selecting by url
title gamma url
1 Bundestag: Schäubles #Twitterverbot erregt Abgeordneten-Gemüter - WELT 0.82576338456062 https://www.welt.de/politik/deutschland/article170909167/Schaeubles-Twitterverbot-erregt-Abgeordneten-Gemueter.html
2 Wahlkampftaktik der Kanzlerin: SPD macht Merkel schwere Vorwürfe - WELT 0.811860582817037 https://www.welt.de/politik/deutschland/article169988019/SPD-macht-Merkel-schwere-Vorwuerfe.html
3 Angela Merkel: SPD macht der Kanzlerin schwere Vorwürfe - WELT 0.81079645868872 https://www.welt.de/politik/deutschland/article169988019/SPD-wirft-Merkel-Schuetzenhilfe-fuer-die-AfD-vor.html
4 Abstimmung im Bundestag: Keine Regierung, aber "Jamaika" funktioniert bereits - WELT 0.785815819797064 https://www.welt.de/politik/deutschland/article169988475/Keine-Regierung-aber-Jamaika-funktioniert-bereits.html
5 Regierung im Kreuzverhör: SPD-Antrag scheitert an "Jamaika" - WELT 0.775337759084737 https://www.welt.de/politik/deutschland/article169988475/SPD-Antrag-scheitert-an-Jamaika.html
6 Bundestag: AfD und Linkspartei kritisieren automatische Diätenerhöhung - WELT 0.604279305641221 https://www.welt.de/politik/deutschland/article171532121/AfD-und-Linkspartei-kritisieren-automatische-Diaetenerhoehung.html
7 Hermann Otto Solms: "Größe des Bundestags gefährdet Ansehen und Arbeitsfähigkeit" - WELT 0.583689856294287 https://www.welt.de/politik/deutschland/article169983673/Groesse-des-Bundestags-gefaehrdet-Ansehen-und-Arbeitsfaehigkeit.html
8 Deutscher Bundestag ohne Fachausschüsse: "Die Abgeordneten haben nichts zu tun" - WELT 0.543241819059454 https://www.welt.de/politik/deutschland/article171196415/Die-Abgeordneten-haben-nichts-zu-tun.html
9 Lohnentwicklung: Neuer Bundestag beschließt Diätenerhöhung für Abgeordnete - WELT 0.442483007814426 https://www.welt.de/politik/deutschland/article171557256/Neuer-Bundestag-beschliesst-Diaetenerhoehung-fuer-Abgeordnete.html
10 Jürgen Trittin: "AfD-Claqueure ändern meine Haltung nicht" - WELT 0.367369986809267 https://www.welt.de/politik/deutschland/article171639958/War-Ihnen-der-Applaus-der-AfD-peinlich-Herr-Trittin.html

The plotQuote function allows to inspect die most common words of a topic for each covariate. Here I check for topic 3 (Jamaica coalition)

topic <- 3

plotQuote(c(paste(sagelabs$cov.betas[[1]]$problabels[topic,], collapse="\n"),
            paste(sagelabs$cov.betas[[2]]$problabels[topic,], collapse="\n"),
            paste(sagelabs$cov.betas[[3]]$problabels[topic,], collapse="\n"),
            paste(sagelabs$cov.betas[[4]]$problabels[topic,], collapse="\n"),
            paste(sagelabs$cov.betas[[5]]$problabels[topic,], collapse="\n"),
            paste(sagelabs$cov.betas[[6]]$problabels[topic,], collapse="\n"),
            paste(sagelabs$cov.betas[[7]]$problabels[topic,], collapse="\n")))

3.1. Topic proportions

In order to get an initial overview of the results, Figure displays the topics ordered by their expected frequency across the corpus (left side of the Figure) and the expected proportion of a topic in public media minus the expected proportion of topic use in private media (right side of the Figure). Thus topics more associated with public media appear to the right of zero. To assign a label to each topic, I looked at the most frequent words in that topic and the most representative articles

Here, I create a Dataframe that contains the columns means of theta (per topic and covariate level)
frequency <- as.data.frame(colMeans(stmOut$theta)) %>% 
  mutate(frequency = colMeans(stmOut$theta),
    topic=paste(topics[,1],topics[,2], sep=": ")) 

freq <- tapply(stmOut$theta[,1], stmOut$settings$covariates$betaindex, mean)
freq <- as.data.frame(freq) %>% 
    mutate(site=stmOut$settings$covariates$yvarlevels,
           topic = 1)

for(i in 2:k) {
  freq1 <- tapply(stmOut$theta[,i], stmOut$settings$covariates$betaindex, mean)
  freq1 <- as.data.frame(freq1) %>% 
    transmute(site=stmOut$settings$covariates$yvarlevels,
           topic = i,
           freq = freq1)
  
  freq <- rbind(freq, freq1)
}

freq <- freq %>%
  left_join(., topics.df, by = "topic") %>%
  mutate(topic = topic_name) %>%
  left_join(., frequency %>% select(topic, frequency),
            by = "topic")

Next, we can plot the expected proportion of topic use in the overall corpus vs. the expected proportion of topic use for each medium.

p1 <- ggplot(frequency, aes(x=reorder(topic, frequency), y=frequency)) + 
    geom_col(fill=col[1], alpha=0.8) +
    coord_flip() +
    labs(x="", y="expected frequency") +
    theme(axis.text.x = element_text(size=8),
          axis.text.y = element_text(size=11),
          axis.title = element_text(size=10))

p1

p2 <- ggplot(freq, aes(reorder(topic_name,frequency), freq)) +
  geom_col(fill = col[3]) +
  #scale_fill_manual(values = col[1]) +
  coord_flip() +
  facet_wrap(~site, ncol = 7) +
  theme(
    #axis.text.y = element_blank(),
          axis.text.y = element_text(size=11),
          axis.title = element_text(size=10)) +
    labs(x="", y="expected frequency") 

p2 

3.2. Difference in topic prevalence

To identify which of these differences is significant, the conditional expectation of topic prevalence for given document characteristics can be estimated. More specifically, I estimate a linear model, where the documents are observations, the dependent variable is the posterior probability of a topic and the covariates are the metadata of documents (see equation below).

\[ \theta_d=\alpha+\beta_1x_{ownership}+\beta_2x_{month}+\epsilon \]

The estimateEffect() uses the method of composition to incorporate uncertainty in the dependent variable, drawing a set of topic proportions from the variational posterior repeated times and compute the coefficients as the average over all results.

effect <- estimateEffect(c(1:k) ~site+s(month), stmOut, 
                         metadata = out$meta, uncertainty = "None")

Here, I create a dataframe that contains the results of the estimation.

tables <- vector(mode="list", length = length(effect$topics))

for (i in seq_along(effect$topics)) {
  sims <- lapply(effect$parameters[[i]], function(x) stm:::rmvnorm(500, x$est, x$vcov))
  sims <- do.call(rbind, sims)
  est <- colMeans(sims)
  se <- sqrt(apply(sims,2, stats::var))
  tval <- est/se
  rdf <- nrow(effect$data) - length(est)
  p <- 2*stats::pt(abs(tval), rdf, lower.tail = FALSE)
  topic <- i
  
  coefficients <- cbind(topic, est, se, tval, p)
  rownames(coefficients) <- attr(effect$parameters[[1]][[1]]$est, "names") 
  colnames(coefficients) <- c("topic", "Estimate", "Std. Error", "t value", "p")
  tables[[i]] <- coefficients
}

out1 <- list(call=effect$call, topics=effect$topics, tables=tables)

coeff <- as.data.frame(do.call(rbind,out1$tables))

coeff <- coeff %>% 
  mutate(parameter = rownames(coeff),
         parameter = gsub("site", "", parameter),
         parameter = ifelse(parameter == "s(month)1", "1_July", parameter),
         parameter = ifelse(parameter == "s(month)2", "2_August", parameter),
         parameter = ifelse(parameter == "s(month)3", "3_September", parameter),
         parameter = ifelse(parameter == "s(month)4", "4_October", parameter),
         parameter = ifelse(parameter == "s(month)5", "5_November", parameter),
         parameter = ifelse(parameter == "s(month)6", "6_December", parameter),
         signifcant = ifelse(p <= 0.5,"yes","no")) %>%
  left_join(., topics.df, by="topic")

The following figure shows the regression results for each news page. The coefficients indicate the deviation from the base value of Bild.de.

p1 <- coeff %>% 
  filter(parameter %in% stmOut$settings$covariates$yvarlevels) %>%
  ggplot(aes(x = reorder(topic_name,topic, decreasing=F), y = Estimate, fill=factor(signifcant))) +
  geom_col() +
  scale_fill_manual(values = col[c(2,1)]) +
  scale_x_discrete(position = "top") +
  coord_flip() +
  facet_wrap(~parameter, ncol = 8, scales = "free_x") +
  labs(x="", fill="significant at the 5% level") +
  theme(legend.position = "top", 
        axis.text.y = element_text(size=9),
        axis.text.x = element_text(angle=90)) 

p1

ggsave(plot = p1, filename = "../figs/estimates.png", device = "png",width = 10, height = 7,
dpi = 600)

Sentiment analysis

The idea of Sentiment analysis is to determine the attitude of a writer through online text data toward certain topic or the overall tonality of a document.

Lexical or “bag-ofwords” approaches are commonly used. In that approach, the researcher provides pre-defined dictionaries (lists) of words associated with a given emotion, such as negativity. The target text is then deconstructed into individual words (or tokens) and the frequencies of words contained in a given dictionary are then calculated.

1. Load sentiment dictionary.

SentimentWortschatz, or SentiWS for short, is a publicly available German-language resource for sentiment analysis, opinion mining etc. It lists positive and negative polarity bearing words weighted within the interval of [-1; 1] plus their part of speech tag, and if applicable, their inflections. The current version of SentiWS (v1.8b) contains 1,650 positive and 1,818 negative words, which sum up to 15,649 positive and 15,632 negative word forms incl. their inflections, respectively. It not only contains adjectives and adverbs explicitly expressing a sentiment, but also nouns and verbs implicitly containing one.

sent <- c(
  # positive Wörter
  readLines("dict/SentiWS_v1.8c_Negative.txt",
            encoding = "UTF-8"),
  # negative Wörter
  readLines("dict/SentiWS_v1.8c_Positive.txt",
            encoding = "UTF-8")
) %>% lapply(function(x) {
  # Extrahieren der einzelnen Spalten
  res <- strsplit(x, "\t", fixed = TRUE)[[1]]
  return(data.frame(words = res[1], value = res[2],
                    stringsAsFactors = FALSE))
}) %>%
  bind_rows %>% 
  mutate(word = gsub("\\|.*", "", words) %>% tolower,
         value = as.numeric(value)) %>% 
  # manche Wörter kommen doppelt vor, hier nehmen wir den mittleren Wert
  group_by(word) %>% summarise(value = mean(value)) %>% ungroup

2. Apply the dictionary on the artciles.

We now take each word in each article and assign a sentiment value for that word. I only use articles that have been assigned a topic with a probability of over 90% (gamma > 0.9).

3. Calculate sentiment value

calculate weight:

\[ \text{weight} = \frac{\text{Total Articles, by site & topic}}{\text{Total Articles, by site}} \]

# Calculate weight
btw.2 <- btw.2 %>%
  
  # how many articles per site & topic?
  group_by(site, topic) %>%
  add_tally() %>%
  ungroup() %>%
  mutate(count = n) %>%
  select(-n) %>%
  
  # how many articles per site?
  group_by(site) %>%
  add_tally() %>%
  ungroup() %>%
  mutate(totalcount = n) %>%
  select(-n) %>%
  
  # calculate weight
  mutate(weight = count/totalcount)

Group by article ID and calculate the mean sentiment value of each article.

sentDF.red <- sentDF %>%

  # calculate weight and weighted sentiment score
  group_by(articleID) %>%
  summarise(sent_value = mean(sentiment)) %>%

  # leftjoin with btw
  left_join(., btw.2 %>% select(articleID, topic, topic_name,
                                title, url, site, gamma, weight),
            by="articleID") %>%
  mutate(weighted = sent_value*weight)

Plot Sentiment

1. by topic

p1 <- sentDF.red %>%
  group_by(topic_name, topic) %>%
  summarise(sent_value = mean(sent_value)) %>%
  ggplot(aes(reorder(topic_name, sent_value), sent_value)) +
  geom_col(fill=col[3], alpha=0.8) +
  coord_flip() +
  labs(x="", y="sentiment value (unweighted)",
       title = "sentiment value (unweighted)") +
  theme(
        axis.text.y = element_text(size = 10))

p2 <- sentDF.red %>%
  group_by(topic_name, topic) %>%
  summarise(weighted = mean(weighted),
            sent_value = mean(sent_value)) %>%
  ggplot(aes(reorder(topic_name, sent_value), weighted)) +
  geom_col(fill=col[1], alpha=0.8) +
  coord_flip() +
  labs(x="", y="sentiment value (weighted)",
       title = "sentiment value (weighted)") +
  theme(
        axis.text.y = element_blank())

p1 + p2

2.by site

p1 <- sentDF.red %>%
  group_by(site) %>%
  summarise(sent_value = mean(sent_value)) %>%
  ggplot(aes(reorder(site, sent_value), sent_value)) +
  geom_col(fill=col[3], alpha=0.8) +
  coord_flip() +
  labs(x="", y="sentiment value (unweighted)",
       title = "sentiment value (unweighted)") +
  theme(
        axis.text.y = element_text(size = 10))

p2 <- sentDF.red %>%
  group_by(site) %>%
  summarise(weighted = mean(weighted),
            sent_value = mean(sent_value)) %>%
  ggplot(aes(reorder(site, sent_value), weighted)) +
  geom_col(fill=col[1], alpha=0.8) +
  coord_flip() +
  labs(x="", y="sentiment value (weighted)",
       title = "sentiment value (weighted)") +
  theme(
        axis.text.y = element_blank())

p1 + p2

3. By site and topic

sentDF.red %>%
  group_by(site, topic_name, topic) %>%
  summarise(sent_value = mean(sent_value)) %>%
  ggplot(aes(reorder(topic_name, topic), sent_value)) +
  geom_col(fill=col[3], alpha=0.8) +
  coord_flip() +
  facet_wrap(~site, ncol = 7) +
  labs(x="", y="sentiment value (unweighted)",
       title = "sentiment value (unweighted)") +
  theme(axis.text.y = element_text(size=12))

ggsave(filename = "../figs/sent_weighted.png", device = "png",width = 12, height = 8,
dpi = 600)
sentDF.red %>%
  group_by(site, topic_name, topic) %>%
  summarise(weighted = mean(weighted)) %>%
  ggplot(aes(reorder(topic_name, topic), weighted)) +
  geom_col(fill=col[1], alpha=0.8) +
  coord_flip() +
  facet_wrap(~site, ncol = 7) +
  labs(x="", y="sentiment value (weighted)",
       title = "sentiment value (weighted)") +
  theme(axis.text.y = element_text(size=12))

Radar plot

library(radarchart)

Select topics to be analyzed:

1: “SPD, M.Schulz” 2: “A.Merkel vs. Schulz” 3: “Jamaica coalition” 6: “A.Merkel” 8: “CSU, Seehofer, Söder” 14: “B90/Die Grüne” 16: “Great coalition” 17: “AfD, in social media”, 24: “CDU, social media” 29: “AfD”

keep <- c(1,2,3,6,8,14,16,17,24,29)

Unweighted

sentDF.red %>% 
  filter(topic %in% keep) %>%
  group_by(site, topic_name) %>%
  summarise(sent_value = mean(sent_value)) %>%
  spread(key=site, value=sent_value) %>%
  ungroup() -> radar
chartJSRadar(scores = radar, labelSize = 8, 
             scaleStartValue = -0.2,
             maxScale = 0.1, 
             showToolTipLabel = TRUE,
             width = 8, height = 8)

Weighted

sentDF.red %>% 
  filter(topic %in% keep) %>%
  group_by(site, topic_name) %>%
  summarise(weighted = mean(weighted)) %>%
  spread(key=site, value=weighted) %>%
  ungroup() -> radar
chartJSRadar(scores = radar, labelSize = 8, 
             scaleStartValue = -0.005,
             maxScale = 0.002, 
             showToolTipLabel = TRUE,
             width = 8, height = 8)